import sys
import pandas as pd
import numpy as np
import math
import copy as cp
import sys
sys.path.append("..") # Adds higher directory to python modules path.
import disdrorain as dr
# import disdrorain_wspeed as drsp
_RD80_ = dr.disdrorain(datapath='../data/RD80_sample_data') # by default disdrometer class limits are RD80 classes
_RD69_ = dr.disdrorain(datapath='../data/RD69_sample_data',classpath='../data/RD69_celllimits') # load RD69 data with RD69 classe limits
_RD80_.classlimits
_RD69_.classlimits
_RD80_.data.head(5)
_RD69_.data.head(5)
Purporse: eliminate outliers counts. </br>
[60 157 121 124 68 67 74 44 14 10 18 11 0 2 0 0 1 0 0 0] is reduced to
[60 157 121 124 68 67 74 44 14 10 18 11 0 0 0 0 0 0 0 0]
# _RD80_clean and _RD69_clean are two new disdrorain class objects whose "data" have been processed by removing outlier counts
# RD80_summary and RD69_summary are tow data frames reporting the "impact" of the outlier procedure application
(_RD80_clean,RD80_summary) = _RD80_.outlier_deletion()
(_RD69_clean,RD69_summary) = _RD69_.outlier_deletion()
Let's look at how the first record of RD69 as been changed:
print(_RD69_.data.head(1).to_string(index=False))
print(_RD69_clean.data.head(1).to_string(index=False))
the counts from C10 to C20 have been set to zero. This mght seems "very bad", but these cases are very rare </br> Let's have a look at the summary data frames
print("RD69 sample report")
print("")
print("The original number of drops in the RD69 sample is: ", _RD69_.data.sum().sum())
print("After removing the outliers, the number of drops is: ", _RD69_clean.data.sum().sum())
print("We removed ", (_RD69_.data.sum().sum()-_RD69_clean.data.sum().sum()), " drops: ",\
round((_RD69_.data.sum().sum()-_RD69_clean.data.sum().sum())/_RD69_.data.sum().sum()*100,2),"% of the original drops")
print("")
print("The number of records affected is: ",RD69_summary.shape[0], \
" (=",round(RD69_summary.shape[0]/_RD69_.data.shape[0]*100,2), "% of the orginal number of records)")
print("")
print("To better quantify the impact, we can calculate the quantiles of percentage of drops removed for each affected record")
print(RD69_summary['perc_delta_drops'].quantile([0.01,0.05,0.25,0.5,0.95,0.75,0.99]))
print("RD80 sample report")
print("")
print("The original number of drops in the RD69 sample is: ", _RD80_.data.sum().sum())
print("After removing the outliers, the number of drops is: ", _RD80_clean.data.sum().sum())
print("We removed ", (_RD80_.data.sum().sum()-_RD80_clean.data.sum().sum()), " drops: ",\
round((_RD80_.data.sum().sum()-_RD80_clean.data.sum().sum())/_RD80_.data.sum().sum()*100,2),"% of the original drops")
print("")
print("The number of records affected is: ",RD80_summary.shape[0], \
" (=",round(RD80_summary.shape[0]/_RD80_.data.shape[0]*100,2), "% of the orginal number of records)")
print("")
print("To better quantify the impact, we can calculate the quantiles of percentage of drops removed for each affected record")
print(RD80_summary['perc_delta_drops'].quantile([0.01,0.05,0.25,0.5,0.95,0.75,0.99]))
# calculate bulk variables with the hypothesis that drop speed is a power law : v(D)=3.67*D^0.67
RD80_bulk_vplaw=_RD80_clean.bulk_variables()
RD69_bulk_vplaw=_RD69_clean.bulk_variables()
# calculate bulk variables with the hypothesis that drop speed is an "exponential plateau" : v(D)=9,65-10.3*exp(-0.6*D)
RD80_bulk_vexpo=_RD80_clean.bulk_variables(_speed_='expo')
RD69_bulk_vexpo=_RD69_clean.bulk_variables(_speed_='expo')
# r=R/N z=Z/Nv w=W/Nv
RD80_bulk_vplaw.head(5)
from bokeh.layouts import gridplot
from bokeh.plotting import figure, show, output_file, output_notebook
TOOLS = "pan,wheel_zoom,box_zoom,reset,save,box_select"
output_notebook()
p1 = figure( tools=TOOLS, title="RD69 -- v(D)=3.67*D**0.67", plot_width=400, plot_height=400, y_range=(-3,7), x_range=(-4,3))
p1.xaxis.axis_label="(log10(R),log10(r))"
p1.yaxis.axis_label="(log10(Z),log10(z))"
p3 = figure( tools=TOOLS, title="RD80 -- v(D)=3.67*D**0.67" ,plot_width=400, plot_height=400, y_range=(-3,7), x_range=(-4,3))
p3.xaxis.axis_label="(log10(R),log10(r))"
p3.yaxis.axis_label="(log10(Z),log10(z))"
p1.circle(np.log10(RD69_bulk_vplaw.R.values), np.log10(RD69_bulk_vplaw.Z.values), color="red", size=1, legend_label="(log10(Z),log10(R))")
p1.circle(np.log10(RD69_bulk_vplaw.r.values), np.log10(RD69_bulk_vplaw.z.values), color="cyan", size=1, legend_label="(log10(z),log10(r))")
p3.circle(np.log10(RD80_bulk_vplaw.R.values), np.log10(RD80_bulk_vplaw.Z.values), color="red", size=1, legend_label="(log10(Z),log10(R))")
p3.circle(np.log10(RD80_bulk_vplaw.r.values), np.log10(RD80_bulk_vplaw.z.values), color="cyan", size=1, legend_label="(log10(z),log10(r))")
p2 = figure( tools=TOOLS, title="RD69 -- v(D)=9,65-10.3*exp(-0.6*D)", plot_width=400, plot_height=400, y_range=(-3,7), x_range=(-4,3))
p2.xaxis.axis_label="(log10(R),log10(r))"
p2.yaxis.axis_label="(log10(Z),log10(z))"
p4 = figure( tools=TOOLS, title="RD80 -- v(D)=9,65-10.3*exp(-0.6*D)",plot_width=400, plot_height=400, y_range=(-3,7), x_range=(-4,3))
p4.xaxis.axis_label="(log10(R),log10(r))"
p4.yaxis.axis_label="(log10(Z),log10(z))"
p2.circle(np.log10(RD69_bulk_vexpo.R.values), np.log10(RD69_bulk_vexpo.Z.values), color="red", size=1, legend_label="(log10(Z),log10(R))")
p2.circle(np.log10(RD69_bulk_vexpo.r.values), np.log10(RD69_bulk_vexpo.z.values), color="cyan", size=1, legend_label="(log10(z),log10(r))")
p4.circle(np.log10(RD80_bulk_vexpo.R.values), np.log10(RD80_bulk_vexpo.Z.values), color="red", size=1, legend_label="(log10(Z),log10(R))")
p4.circle(np.log10(RD80_bulk_vexpo.r.values), np.log10(RD80_bulk_vexpo.z.values), color="cyan", size=1, legend_label="(log10(z),log10(r))")
p1.legend.location = "top_left"
p3.legend.location = "top_left"
p2.legend.location = "top_left"
p4.legend.location = "top_left"
grid = gridplot([[p1, p2,],[p3, p4]])
show(grid)
C=pow((0.005*60)/(6*math.pi*0.0001),2)
RD69_bulk_vplaw.loc[:,'zetap']=RD69_bulk_vplaw.z/pow(RD69_bulk_vplaw.r,2)/C
RD80_bulk_vplaw.loc[:,'zetap']=RD80_bulk_vplaw.z/pow(RD80_bulk_vplaw.r,2)/C
RD69_bulk_vexpo.loc[:,'zetap']=RD69_bulk_vexpo.z/pow(RD69_bulk_vexpo.r,2)/C
RD80_bulk_vexpo.loc[:,'zetap']=RD80_bulk_vexpo.z/pow(RD80_bulk_vexpo.r,2)/C
def make_zetap_cdf(_df_, _binisize_):
_df_.loc[:,'bin'] = np.floor(_df_.zetap/_binisize_)*_binisize_
_A_ = _df_.groupby('bin')['N'].count().to_frame().reset_index()
_A_.rename(columns={'N':'count'},inplace=True)
_A_.loc[:,'cdf'] = _A_['count'].cumsum()/_A_['count'].sum()
_A_.loc[-1] = [_A_['bin'].min()-_binisize_, 0, 0]
_A_.index = _A_.index + 1 # shifting index
_A_= _A_.sort_index() # sorting by index
return _A_
RD69_zetap_cdf_vplaw = make_zetap_cdf(RD69_bulk_vplaw,0.025)
RD80_zetap_cdf_vplaw = make_zetap_cdf(RD80_bulk_vplaw,0.025)
RD69_zetap_cdf_vexpo = make_zetap_cdf(RD69_bulk_vexpo,0.025)
RD80_zetap_cdf_vexpo = make_zetap_cdf(RD80_bulk_vexpo,0.025)
from bokeh.layouts import gridplot
from bokeh.plotting import figure, show, output_file, output_notebook
TOOLS = "pan,wheel_zoom,box_zoom,reset,save,box_select"
output_notebook()
p1 = figure( tools=TOOLS, title="RD69", plot_width=500, plot_height=400)
p2 = figure( tools=TOOLS, title="RD80", plot_width=500, plot_height=400)
p1.line(RD69_zetap_cdf_vplaw.loc[RD69_zetap_cdf_vplaw.cdf<1,:].bin.values,np.log10(1-RD69_zetap_cdf_vplaw.loc[RD69_zetap_cdf_vplaw.cdf<1,:].cdf.values), \
color="burlywood", line_width=4, legend_label="v(D)=3.67*D**0.67")
p1.line(RD69_zetap_cdf_vexpo.loc[RD69_zetap_cdf_vexpo.cdf<1,:].bin.values,np.log10(1-RD69_zetap_cdf_vexpo.loc[RD69_zetap_cdf_vexpo.cdf<1,:].cdf.values), \
color="darkturquoise", line_width=4, legend_label="v(D)=9,65-10.3*exp(-0.6*D)")
p2.line(RD80_zetap_cdf_vplaw.loc[RD80_zetap_cdf_vplaw.cdf<1,:].bin.values,np.log10(1-RD80_zetap_cdf_vplaw.loc[RD80_zetap_cdf_vplaw.cdf<1,:].cdf.values), \
color="burlywood", line_width=4, legend_label="v(D)=3.67*D**0.67")
p2.line(RD80_zetap_cdf_vexpo.loc[RD80_zetap_cdf_vexpo.cdf<1,:].bin.values,np.log10(1-RD80_zetap_cdf_vexpo.loc[RD80_zetap_cdf_vexpo.cdf<1,:].cdf.values), \
color="darkturquoise", line_width=4, legend_label="v(D)=9,65-10.3*exp(-0.6*D)")
p1.xaxis.axis_label="zeta_p"
p1.yaxis.axis_label="1-CDF(zeta_p)"
p2.xaxis.axis_label="zeta_p"
p2.yaxis.axis_label="1-CDF(zeta_p)"
grid = gridplot([[p1, p2,],])
show(grid)